The existing vIOPL interface is hard to use, and need not be.
Introduce a VMASSIST with which a guest can opt-in to having vIOPL behaviour
consistenly with native hardware.
Specifically:
- virtual iopl updated from do_iret() hypercalls.
- virtual iopl reported in bounce frames.
- guest kernels assumed to be level 0 for the purpose of iopl checks.
v->arch.pv_vcpu.iopl is altered to store IOPL shifted as it would exist
eflags, for the benefit of the assembly code.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
init_int80_direct_trap(v);
/* IOPL privileges are virtualised. */
- v->arch.pv_vcpu.iopl = (v->arch.user_regs.eflags >> 12) & 3;
+ v->arch.pv_vcpu.iopl = v->arch.user_regs.eflags & X86_EFLAGS_IOPL;
v->arch.user_regs.eflags &= ~X86_EFLAGS_IOPL;
/* Ensure real hardware interrupts are enabled. */
cs_and_mask = (unsigned short)regs->cs |
((unsigned int)vcpu_info(n, evtchn_upcall_mask) << 16);
/* Fold upcall mask into RFLAGS.IF. */
- eflags = regs->_eflags & ~X86_EFLAGS_IF;
+ eflags = regs->_eflags & ~(X86_EFLAGS_IF|X86_EFLAGS_IOPL);
eflags |= !vcpu_info(n, evtchn_upcall_mask) << 9;
+ if ( VM_ASSIST(n->domain, architectural_iopl) )
+ eflags |= n->arch.pv_vcpu.iopl;
if ( !ring_1(regs) )
{
vcpu_info(n, evtchn_upcall_mask) = 1;
regs->entry_vector |= TRAP_syscall;
- regs->_eflags &= 0xFFFCBEFFUL;
+ regs->_eflags &= ~(X86_EFLAGS_VM|X86_EFLAGS_RF|X86_EFLAGS_NT|
+ X86_EFLAGS_IOPL|X86_EFLAGS_TF);
regs->ss = FLAT_COMPAT_KERNEL_SS;
regs->_esp = (unsigned long)(esp-7);
regs->cs = FLAT_COMPAT_KERNEL_CS;
((unsigned long)vcpu_info(n, evtchn_upcall_mask) << 32);
/* Fold upcall mask into RFLAGS.IF. */
- rflags = regs->rflags & ~X86_EFLAGS_IF;
+ rflags = regs->rflags & ~(X86_EFLAGS_IF|X86_EFLAGS_IOPL);
rflags |= !vcpu_info(n, evtchn_upcall_mask) << 9;
+ if ( VM_ASSIST(n->domain, architectural_iopl) )
+ rflags |= n->arch.pv_vcpu.iopl;
if ( put_user(regs->ss, rsp- 1) |
put_user(regs->rsp, rsp- 2) |
regs->entry_vector |= TRAP_syscall;
regs->rflags &= ~(X86_EFLAGS_AC|X86_EFLAGS_VM|X86_EFLAGS_RF|
- X86_EFLAGS_NT|X86_EFLAGS_TF);
+ X86_EFLAGS_NT|X86_EFLAGS_IOPL|X86_EFLAGS_TF);
regs->ss = FLAT_KERNEL_SS;
regs->rsp = (unsigned long)(rsp-11);
regs->cs = FLAT_KERNEL_CS;
if ( set_iopl.iopl > 3 )
break;
ret = 0;
- curr->arch.pv_vcpu.iopl = set_iopl.iopl;
+ curr->arch.pv_vcpu.iopl = MASK_INSR(set_iopl.iopl, X86_EFLAGS_IOPL);
break;
}
return 1;
}
+/* Perform IOPL check between the vcpu's shadowed IOPL, and the assumed cpl. */
+static bool_t iopl_ok(const struct vcpu *v, const struct cpu_user_regs *regs)
+{
+ unsigned int cpl = guest_kernel_mode(v, regs) ?
+ (VM_ASSIST(v->domain, architectural_iopl) ? 0 : 1) : 3;
+
+ ASSERT((v->arch.pv_vcpu.iopl & ~X86_EFLAGS_IOPL) == 0);
+
+ return IOPL(cpl) <= v->arch.pv_vcpu.iopl;
+}
+
/* Has the guest requested sufficient permission for this I/O access? */
static int guest_io_okay(
unsigned int port, unsigned int bytes,
int user_mode = !(v->arch.flags & TF_kernel_mode);
#define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v)
- if ( v->arch.pv_vcpu.iopl >= (guest_kernel_mode(v, regs) ? 1 : 3) )
+ if ( iopl_ok(v, regs) )
return 1;
if ( v->arch.pv_vcpu.iobmp_limit > (port + bytes) )
case 0xfa: /* CLI */
case 0xfb: /* STI */
- if ( v->arch.pv_vcpu.iopl < (guest_kernel_mode(v, regs) ? 1 : 3) )
+ if ( !iopl_ok(v, regs) )
goto fail;
/*
* This is just too dangerous to allow, in my opinion. Consider if the
OFFSET(VCPU_trap_ctxt, struct vcpu, arch.pv_vcpu.trap_ctxt);
OFFSET(VCPU_kernel_sp, struct vcpu, arch.pv_vcpu.kernel_sp);
OFFSET(VCPU_kernel_ss, struct vcpu, arch.pv_vcpu.kernel_ss);
+ OFFSET(VCPU_iopl, struct vcpu, arch.pv_vcpu.iopl);
OFFSET(VCPU_guest_context_flags, struct vcpu, arch.vgc_flags);
OFFSET(VCPU_nmi_pending, struct vcpu, nmi_pending);
OFFSET(VCPU_mce_pending, struct vcpu, mce_pending);
OFFSET(MB_flags, multiboot_info_t, flags);
OFFSET(MB_cmdline, multiboot_info_t, cmdline);
OFFSET(MB_mem_lower, multiboot_info_t, mem_lower);
+
+ OFFSET(DOMAIN_vm_assist, struct domain, vm_assist);
}
movl UREGS_rsp+8(%rsp),%esi
.Lft4: mov UREGS_ss+8(%rsp),%fs
2:
+ movq VCPU_domain(%rbx),%r8
subl $3*4,%esi
movq VCPU_vcpu_info(%rbx),%rax
pushq COMPAT_VCPUINFO_upcall_mask(%rax)
testb %al,%al # Bits 0-7: saved_upcall_mask
setz %ch # %ch == !saved_upcall_mask
movl UREGS_eflags+8(%rsp),%eax
- andl $~X86_EFLAGS_IF,%eax
+ andl $~(X86_EFLAGS_IF|X86_EFLAGS_IOPL),%eax
addb %ch,%ch # Bit 9 (EFLAGS.IF)
orb %ch,%ah # Fold EFLAGS.IF into %eax
+ xorl %ecx,%ecx # if ( VM_ASSIST(v->domain, architectural_iopl) )
+ testb $1 << VMASST_TYPE_architectural_iopl,DOMAIN_vm_assist(%r8)
+ cmovnzl VCPU_iopl(%rbx),%ecx # Bits 13:12 (EFLAGS.IOPL)
+ orl %ecx,%eax # Fold EFLAGS.IOPL into %eax
.Lft6: movl %eax,%fs:2*4(%rsi) # EFLAGS
movl UREGS_rip+8(%rsp),%eax
.Lft7: movl %eax,%fs:(%rsi) # EIP
domain_crash(v->domain);
return 0;
}
+
+ if ( VM_ASSIST(v->domain, architectural_iopl) )
+ v->arch.pv_vcpu.iopl = eflags & X86_EFLAGS_IOPL;
+
regs->_eflags = (eflags & ~X86_EFLAGS_IOPL) | X86_EFLAGS_IF;
if ( unlikely(eflags & X86_EFLAGS_VM) )
subq $40,%rsi
movq UREGS_ss+8(%rsp),%rax
ASM_STAC
+ movq VCPU_domain(%rbx),%rdi
.Lft2: movq %rax,32(%rsi) # SS
movq UREGS_rsp+8(%rsp),%rax
.Lft3: movq %rax,24(%rsi) # RSP
testb $0xFF,%al # Bits 0-7: saved_upcall_mask
setz %ch # %ch == !saved_upcall_mask
movl UREGS_eflags+8(%rsp),%eax
- andl $~X86_EFLAGS_IF,%eax
+ andl $~(X86_EFLAGS_IF|X86_EFLAGS_IOPL),%eax
addb %ch,%ch # Bit 9 (EFLAGS.IF)
orb %ch,%ah # Fold EFLAGS.IF into %eax
+ xorl %ecx,%ecx # if ( VM_ASSIST(v->domain, architectural_iopl) )
+ testb $1 << VMASST_TYPE_architectural_iopl,DOMAIN_vm_assist(%rdi)
+ cmovnzl VCPU_iopl(%rbx),%ecx # Bits 13:12 (EFLAGS.IOPL)
+ orl %ecx,%eax # Fold EFLAGS.IOPL into %eax
.Lft5: movq %rax,16(%rsi) # RFLAGS
movq UREGS_rip+8(%rsp),%rax
.Lft6: movq %rax,(%rsi) # RIP
toggle_guest_mode(v);
}
+ if ( VM_ASSIST(v->domain, architectural_iopl) )
+ v->arch.pv_vcpu.iopl = iret_saved.rflags & X86_EFLAGS_IOPL;
+
regs->rip = iret_saved.rip;
regs->cs = iret_saved.cs | 3; /* force guest privilege */
regs->rflags = ((iret_saved.rflags & ~(X86_EFLAGS_IOPL|X86_EFLAGS_VM))
(1UL << VMASST_TYPE_4gb_segments_notify) | \
(1UL << VMASST_TYPE_writable_pagetables) | \
(1UL << VMASST_TYPE_pae_extended_cr3) | \
+ (1UL << VMASST_TYPE_architectural_iopl) | \
(1UL << VMASST_TYPE_m2p_strict))
#define VM_ASSIST_VALID NATIVE_VM_ASSIST_VALID
#define COMPAT_VM_ASSIST_VALID (NATIVE_VM_ASSIST_VALID & \
/* I/O-port access bitmap. */
XEN_GUEST_HANDLE(uint8) iobmp; /* Guest kernel vaddr of the bitmap. */
unsigned int iobmp_limit; /* Number of ports represented in the bitmap. */
- unsigned int iopl; /* Current IOPL for this VCPU. */
+#define IOPL(val) MASK_INSR(val, X86_EFLAGS_IOPL)
+ unsigned int iopl; /* Current IOPL for this VCPU, shifted left by
+ * 12 to match the eflags register. */
/* Current LDT details. */
unsigned long shadow_ldt_mapcnt;
/* x86/PAE guests: support PDPTs above 4GB. */
#define VMASST_TYPE_pae_extended_cr3 3
+/*
+ * x86 guests: Sane behaviour for virtual iopl
+ * - virtual iopl updated from do_iret() hypercalls.
+ * - virtual iopl reported in bounce frames.
+ * - guest kernels assumed to be level 0 for the purpose of iopl checks.
+ */
+#define VMASST_TYPE_architectural_iopl 4
+
/*
* x86/64 guests: strictly hide M2P from user mode.
* This allows the guest to control respective hypervisor behavior: